#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed May  4 13:07:11 2022

@author: cythnia
"""

import pandas as pd
import numpy as np
import requests
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
import time
#设置url
url=['https://www.liepin.com/zhaopin/?headId=3a0769efa756315bfdcdcd2044e4b2cc&ckId=f58hmvbsxzs4j1b317ifknqtidcgjvzb&oldCkId=3a0769efa756315bfdcdcd2044e4b2cc&fkId=ved7wcl7tfn6fx2weoopy5s73dzpjhv9&skId=ved7wcl7tfn6fx2weoopy5s73dzpjhv9&sfrom=search_job_pc&key=%E9%81%97%E4%BC%A0%E5%92%A8%E8%AF%A2%E5%B8%88&currentPage={}&scene=page'.format(i) for i in range(0,7)]
#设置请求头
headers={
    'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36'
    }


#设置驱动浏览器
driver=webdriver.Chrome()
lis=[]
#打开需要爬取的网站
for urli in url:
    driver.get(urli)
    #等待5-8秒再进行操作
    driver.implicitly_wait(np.random.randint(5,8))
#选择行业
# driver.find_element_by_css_selector('#filter-options-selector-bar > div > div:nth-child(2) > span').click()
# time.sleep(2)
# driver.find_element_by_xpath(" /html/body/div[2]/div/div/div/ul/li[10]").click()
# time.sleep(2)
# driver.find_element_by_xpath(" /html/body/div[2]/div/div/div/ul[2]/li[1]").click()
# time.sleep(2)
#找到搜索框122
# sousuo=driver.find_element_by_css_selector('#lp-search-bar-section > div > div > div > div > div > div:nth-child(1) > div.jsx-3599059289.search-input-container > div > div > div > input')
# time.sleep(3)
# sousuo.send_keys('研发；NGS')
# time.sleep(3)
# sousuo.send_keys(Keys.ENTER)
# time.sleep(3)
    #获取信息
    #公司
    gongsi=driver.find_elements_by_css_selector('body > div > div > section.content-left-section > div > ul > li > div > div > div.job-card-left-box > div > div > div > span')
    #岗位名称
    mingcheng=driver.find_elements_by_css_selector('body > div > div > section.content-left-section > div > ul > li > div > div > div.job-card-left-box > div > a > div.job-detail-header-box > div > div.ellipsis-1')
    #地点
    didian=driver.find_elements_by_css_selector('body > div > div > section.content-left-section > div > ul > li > div > div > div.job-card-left-box > div > a > div.job-detail-header-box > div > div.job-dq-box > span.ellipsis-1')
    #薪资
    xinzi=driver.find_elements_by_css_selector('body > div > div > section.content-left-section > div > ul > li > div > div > div.job-card-left-box > div > a > div.job-detail-header-box > span')
    #经验要求
    jingyan=driver.find_elements_by_css_selector('body > div > div > section.content-left-section > div > ul > li > div > div > div.job-card-left-box > div > a > div.job-labels-box > span:nth-child(1)')
    #学历要求
    xueli=driver.find_elements_by_css_selector('body > div > div > section.content-left-section > div > ul > li > div > div > div.job-card-left-box > div > a > div.job-labels-box > span:nth-child(2)') 
    #链接
    lianjie=driver.find_elements_by_css_selector('body > div > div > section.content-left-section > div > ul > li > div > div > div.job-card-left-box > div > a')
    print('已完成爬取第'+str(i+1)+'页')
    #汇总数据
    for gongsis,mingchengs,didians,xinzis,jingyans,xuelis,lianjies in zip(gongsi,mingcheng,didian,xinzi,jingyan,xueli,lianjie):
        gongsii=gongsis.text
        mingchengi=mingchengs.text
        didiani=didians.text
        xinzii=xinzis.text
        jingyani=jingyans.text
        xuelii=xuelis.text
        lianjiei=lianjies.get_attribute('href')
        lis.append([gongsii,mingchengi,didiani,xinzii,jingyani,xuelii,lianjiei])
    time.sleep(np.random.randint(5,8))
    print('已汇总完成第'+str(i+1)+'页')
#进入网页
# driver.get(url)
# #等待5-8秒再进行操作
# driver.implicitly_wait(np.random.randint(5,8))
# sousuo=driver.find_element_by_css_selector('#lp-search-bar-section > div > div > div > div > div > div:nth-child(1) > div.jsx-3599059289.search-input-container > div > div > div > input')
# time.sleep(3)
# sousuo.send_keys('研发；NGS')
# time.sleep(3)
# sousuo.send_keys(Keys.ENTER)
# time.sleep(3)
#获取信息
#爬取有名称的公司
for urli in url:
    driver.get(urli)
    #等待5-8秒再进行操作
    driver.implicitly_wait(np.random.randint(5,8))
    #公司
    gongsi=driver.find_elements_by_css_selector('body > div > div > section.content-left-section > div > ul > li > div > div > div.job-card-left-box > div > a:nth-child(2) > div > div > span')
    #岗位名称
    mingcheng=driver.find_elements_by_css_selector('body > div > div > section.content-left-section > div > ul > li > div > div > div.job-card-left-box > div > a:nth-child(1) > div.job-detail-header-box > div > div.ellipsis-1')
    #地点
    didian=driver.find_elements_by_css_selector('body > div > div > section.content-left-section > div > ul > li > div > div > div.job-card-left-box > div > a:nth-child(1) > div.job-detail-header-box > div > div.job-dq-box > span.ellipsis-1')
    #薪资
    xinzi=driver.find_elements_by_css_selector('body > div > div > section.content-left-section > div > ul > li > div > div > div.job-card-left-box > div > a:nth-child(1) > div.job-detail-header-box > span')
    #经验要求
    jingyan=driver.find_elements_by_css_selector('body > div > div > section.content-left-section > div > ul > li > div > div > div.job-card-left-box > div > a:nth-child(1) > div.job-labels-box > span:nth-child(1)')
    #学历要求
    xueli=driver.find_elements_by_css_selector('body > div > div > section.content-left-section > div > ul > li > div > div > div.job-card-left-box > div > a:nth-child(1) > div.job-labels-box > span:nth-child(2)')
    print('已完成爬取第'+str(i+1)+'页，有名称公司')
    #链接
    lianjie=driver.find_elements_by_css_selector('body > div > div > section.content-left-section > div > ul > li > div > div > div.job-card-left-box > div > a:nth-child(1)')
    #汇总数据
    for gongsis,mingchengs,didians,xinzis,jingyans,xuelis,lianjies in zip(gongsi,mingcheng,didian,xinzi,jingyan,xueli,lianjie):
        gongsii=gongsis.text
        mingchengi=mingchengs.text
        didiani=didians.text
        xinzii=xinzis.text
        jingyani=jingyans.text
        xuelii=xuelis.text
        lianjiei=lianjies.get_attribute('href')
        lis.append([gongsii,mingchengi,didiani,xinzii,jingyani,xuelii,lianjiei])
    time.sleep(np.random.randint(5,8))
    print('已汇总完成第'+str(i+1)+'页，有名称公司')
print('爬取完成所有页')
result=pd.DataFrame(lis,columns=['公司名称','岗位名称','工作地点','薪资待遇','经验要求','学历要求','招聘链接'])
result.to_excel('/Users/cythnia/Desktop/遗传咨询师招聘信息.xlsx',index=False)    
